#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Project: spd-sxmcc
"""
@author: lyndon
@time Created on 2018/12/24 13:59
@desc
"""

import requests
from bs4 import BeautifulSoup

user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"
headers = {"User-Agent": user_agent}  # 请求头,headers是一个字典类型
url_pre = 'https://ty.5i5j.com'

with open(r"D:\iProject\myPython\com\teradata\laccelllatitude\wowojtyrsf.txt", 'r') as f:
    soup = BeautifulSoup(f, "lxml")
    lis = soup.select('.listTit')
    apartments = []
    for li in lis:
        apartment = {}
        apartment['name'] = li.text.strip()
        apartment['url'] = li.select_one("a").get('href')

        url_detail = url_pre + apartment['url']
        res = requests.get(url_detail, headers=headers)
        res.encoding = "utf-8"  # 设置编码，防止乱码
        soup2 = BeautifulSoup(res.text, "lxml")
        apartment['infos'] = [li.text.strip() for li in soup2.select('li') if '总户数' in li.text.encode('utf-8').strip() ]


        print(apartment['name'])
        print(apartment['url'])
        for ifs in apartment['infos']:
            print(ifs)

        # apartment['id'] = int(li.select("a[href*='exchange']")[0]['href'].split('/')[-1].strip())  # 交易号
        # apartment['community'] = li.select("ul.list-info-l a[href*='community']")[0].text.strip()  # 小区
        # apartment['communityId'] = int(
        #     li.select("ul.list-info-l a[href*='community']")[0]['href'].split('/')[-1].strip())  # 小区编号
        # # apartment['adress']=li.select('ul.list-info-l a:nth-of-type(2)')[0].text.strip()#地址
        # apartment['type'] = li.select('li.font-balck span:nth-of-type(1)')[0].text.strip()  # 房型
        # apartment['price'] = int(li.select('.list-info-r p')[0].text.rstrip('元/平米').strip())  # 单价 元/平米
        # apartment['totalPrice'] = int(li.select('div.list-info-r h3')[0].text.rstrip('万元').strip())  # 抓取总价 万元
        apartments.append(apartment)

    # for i in apartments:
    #     print(i['name'])
    #     print(i['url'])
    #     infos = i['infos']
    #
